Last Updated: Sep-06-2021

Introduction

Let’s do stuff with the Atlanta Crime Data. We’ll follow recommendations from codeforatlanta/apd-crime-data github.

Namley:

The following packages must be loaded:

library(readxl)
library(tidyr)
library(dplyr)
library(leaflet)
library(Hmisc)
library(lubridate)
library(maptools)
library(foreign)
library(here)

Get Data

First we’ll download the zip file. Commented lines were run once.

# temp <- tempfile(tmpdir = here::here())
# download.file(url = "http://www.atlantapd.org/files/crimedata/COBRA110416.zip"
              # ,destfile =  here::here("COBRA110416.zip"))
apdCrimeData <- read_excel(here::here("COBRA110416.xlsx")
  , sheet="Query")
# unlink(temp)

Let’s check and make sure each column is what we expect it to be:

apdCrimeData %>% str()
## tibble [261,976 × 23] (S3: tbl_df/tbl/data.frame)
##  $ MI_PRINX         : chr [1:261976] "1160569" "1160570" "1160572" "1160573" ...
##  $ offense_id       : chr [1:261976] "090360664" "090370891" "091681984" "072692336" ...
##  $ rpt_date         : chr [1:261976] "02/05/2009" "02/06/2009" "06/17/2009" "02/24/2010" ...
##  $ occur_date       : chr [1:261976] "02/03/2009" "02/06/2009" "06/17/2009" "02/24/2010" ...
##  $ occur_time       : chr [1:261976] "13:50:00" "08:50:00" "14:00:00" "23:29:00" ...
##  $ poss_date        : chr [1:261976] "02/03/2009" "02/06/2009" "06/17/2009" "02/24/2010" ...
##  $ poss_time        : chr [1:261976] "15:00:00" "10:45:00" "15:00:00" "23:30:00" ...
##  $ beat             : chr [1:261976] "305" "502" "604" "303" ...
##  $ apt_office_prefix: chr [1:261976] NA NA NA NA ...
##  $ apt_office_num   : chr [1:261976] NA NA "816" NA ...
##  $ location         : chr [1:261976] "55 MCDONOUGH BLVD SW" "464 ANSLEY WALK TER NW" "375 AUBURN AVE" "600 MARTIN ST" ...
##  $ MinOfucr         : chr [1:261976] "0670" "0640" "0670" "0420" ...
##  $ MinOfibr_code    : chr [1:261976] "2308" "2305" "2308" "1315K" ...
##  $ dispo_code       : chr [1:261976] NA NA NA NA ...
##  $ MaxOfnum_victims : chr [1:261976] "1" "1" "1" "1" ...
##  $ Shift            : chr [1:261976] "Day" "Day" "Day" "Morn" ...
##  $ Avg Day          : chr [1:261976] "Tue" "Fri" "Wed" "Wed" ...
##  $ loc_type         : chr [1:261976] "35" "18" NA "26" ...
##  $ UC2 Literal      : chr [1:261976] "LARCENY-NON VEHICLE" "LARCENY-FROM VEHICLE" "LARCENY-NON VEHICLE" "AGG ASSAULT" ...
##  $ neighborhood     : chr [1:261976] "South Atlanta" "Ansley Park" "Sweet Auburn" "Pittsburgh" ...
##  $ npu              : chr [1:261976] "Y" "E" "M" "V" ...
##  $ x                : chr [1:261976] "-84.386539999999997" "-84.37276" "-84.375209999999996" "-84.394599999999997" ...
##  $ y                : chr [1:261976] "33.720239999999997" "33.796849999999999" "33.755400000000002" "33.722119999999997" ...
names(apdCrimeData) <- gsub(" ", "_", names(apdCrimeData))
apdCrimeDataTidy <- apdCrimeData %>% 
  mutate(MI_PRINX = as.numeric(MI_PRINX),
         offense_id = as.numeric(offense_id),
         rpt_date = lubridate::as_date(rpt_date, format = "%m/%d/%Y"),
         occur_date = lubridate::as_date(occur_date, format = "%m/%d/%Y"),
         poss_date = lubridate::as_date(poss_date, format = "%m/%d/%Y"),
         x = as.numeric(x),
         y = as.numeric(y))

Let’s use pieces from codeforatlanta’s function to tidy this data:

errors_horiz_offset <- c(91350923, 91420511, 91471067, 91521689, 101540909, 
                           101701138, 111971638, 112090917, 112411694, 113130827, 
                           113221244, 113270554, 113531411, 113590628, 120230979, 
                           122561142, 130101490, 141621526, 142570818, 151362710)
errors_strange_date <- c(141260924)
errors_all <- c(errors_horiz_offset, errors_strange_date)
apdCrimeDataClean <- apdCrimeDataTidy %>% 
  filter(!(offense_id %in% errors_all)) 
apdCrimeDataErrors <- apdCrimeDataTidy %>% 
  filter(offense_id %in% errors_all) 

Let’s download the shapefiles directly from the atlanta crime data site:

# temp <- tempfile(tmpdir = here::here())
# download.file(url = "http://www.atlantapd.org/pdf/crime-data-downloads/1909FAB1-9E7F-4A34-8CDD-142D9DC83E7C.zip"
#               ,destfile =  here::here("1909FAB1-9E7F-4A34-8CDD-142D9DC83E7C.zip"))
# unlink(temp)
beats <- readShapeSpatial(here::here("1909FAB1-9E7F-4A34-8CDD-142D9DC83E7C","APD-Beats-070116_region.shp"))
## Warning: readShapeSpatial is deprecated; use rgdal::readOGR or sf::st_read
## Warning: readShapePoly is deprecated; use rgdal::readOGR or sf::st_read
zones <- readShapeSpatial(here::here("1909FAB1-9E7F-4A34-8CDD-142D9DC83E7C","APD-Zones-070116_region.shp"))
## Warning: readShapeSpatial is deprecated; use rgdal::readOGR or sf::st_read

## Warning: readShapePoly is deprecated; use rgdal::readOGR or sf::st_read

Now let’s add info from the .dbf files that came with the shapefiles.

beats_dbf <- read.dbf(here::here("1909FAB1-9E7F-4A34-8CDD-142D9DC83E7C","APD-Beats-070116_region.dbf"))
zones_dbf <- read.dbf(here::here("1909FAB1-9E7F-4A34-8CDD-142D9DC83E7C","APD-Zones-070116_region.dbf"))

Maps

Let’s filter the data in different ways. leaflet seems unable to handle too many points on a map.

First Map

Now let’s plot 100 crimes on a map in leaflet.

# describe(apdCrimeDataClean %>% select(x, y))
apdCrimeDataClean %>% 
  slice(1:100) %>% 
  leaflet() %>% 
  addTiles() %>% 
  addMarkers(lng = ~x, lat = ~y, popup = ~UC2_Literal)

Homicide map

How many crimes happened in 2016 alone?

apdCrimeDataClean %>% 
  filter(year(occur_date) == 2016) %>% 
  nrow()
## [1] 24220

That might be too much for a single leaflet plot. What are the frequencies of all the different types of crime?

apdCrimeDataClean %>% 
  group_by(UC2_Literal) %>% 
  summarise(freq = n()) %>% 
  ungroup()
## # A tibble: 11 × 2
##    UC2_Literal           freq
##  * <chr>                <int>
##  1 AGG ASSAULT          18507
##  2 AUTO THEFT           37095
##  3 BURGLARY-NONRES       8186
##  4 BURGLARY-RESIDENCE   41946
##  5 HOMICIDE               692
##  6 LARCENY-FROM VEHICLE 74316
##  7 LARCENY-NON VEHICLE  62641
##  8 RAPE                   935
##  9 ROBBERY-COMMERCIAL    1784
## 10 ROBBERY-PEDESTRIAN   14018
## 11 ROBBERY-RESIDENCE     1835

Let’s make a map of only homicides.

apdCrimeDataClean %>% 
  filter(UC2_Literal=="HOMICIDE") %>% 
  leaflet() %>% 
  addTiles() %>% 
  addCircles(lng = ~x, lat = ~y, popup = ~location, col="red")

Add Shapefiles

Now we’ll try to add shapefiles to our map. Let’s first try adding the beats.

apdCrimeDataClean %>% 
  filter(UC2_Literal=="HOMICIDE") %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolygons(data=beats, stroke=FALSE, fillOpacity = 0.5, smoothFactor=0.5,
              color="gray") %>% 
  addPolylines(data=beats, stroke=TRUE, fillOpacity = 0.5, smoothFactor=0.5,
              color="black") %>%
  addCircles(lng = ~x, lat = ~y, popup = ~location, col="red")

Now add zones.

apdCrimeDataClean %>% 
  filter(UC2_Literal=="HOMICIDE") %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolygons(data=zones, stroke=FALSE, fillOpacity = 0.5, smoothFactor=0.5,
              color="gray") %>% 
  addPolylines(data=zones, stroke=TRUE, fillOpacity = 0.5, smoothFactor=0.5,
              color="black") %>%
  addCircles(lng = ~x, lat = ~y, popup = ~location, col="red")

What if I add both zones and beats?

apdCrimeDataClean %>% 
  filter(UC2_Literal=="HOMICIDE") %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolygons(data=zones, stroke=FALSE, fillOpacity = 0.5, smoothFactor=0.5,
              color="gray") %>% 
  addPolylines(data=zones, stroke=TRUE, fillOpacity = 0.5, smoothFactor=0.5,
              color="blue") %>%
  addPolylines(data=beats, stroke=TRUE, fillOpacity = 0.5, smoothFactor=0.5,
              color="red") %>%
  addCircles(lng = ~x, lat = ~y, popup = ~location, col="red")

Color by Beat

Can we color the beats or zones by total number of crime type?

Homicide

homicidesByBeat <- apdCrimeDataClean %>% 
  filter(UC2_Literal=="HOMICIDE") %>% 
  group_by(beat) %>% 
  summarise(freq = n()) %>% 
  ungroup()
# heatCols <- heat.colors(nrow(homicidesByBeat))[cut(sort(homicidesByBeat$freq),nrow(homicidesByBeat))]
pal <- colorNumeric(
  # palette = "YlGnBu",
  # palette = "RdYlBu",
  palette = "YlOrRd",
  domain = homicidesByBeat$freq
)
apdCrimeDataClean %>% 
  # filter(UC2_Literal=="HOMICIDE") %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolylines(data=beats, stroke=TRUE, fillOpacity = 0.5, smoothFactor=0.5,
              color="black") %>%
  # addPolygons(data=beats, stroke=FALSE, fillOpacity = 0.5, smoothFactor=0.5,
              # fillColor=heatCols) %>% 
  addPolygons(data=beats, stroke=FALSE, fillOpacity = 0.5, smoothFactor = 0.5,
              color = ~pal(homicidesByBeat$freq)) %>% 
  addLegend("bottomright", pal = pal, values = homicidesByBeat$freq,
            title = "Legend",
            opacity = 1)

Rape

rapeByBeat <- apdCrimeDataClean %>% 
  filter(UC2_Literal=="RAPE") %>% 
  group_by(beat) %>% 
  summarise(freq = n()) %>% 
  ungroup()
# heatCols <- heat.colors(nrow(rapeByBeat))[cut(sort(rapeByBeat$freq),nrow(rapeByBeat))]
pal <- colorNumeric(
  palette = "YlGnBu",
  domain = rapeByBeat$freq
)
apdCrimeDataClean %>% 
  # filter(UC2_Literal=="RAPE") %>% 
  leaflet() %>% 
  addTiles() %>% 
  addPolylines(data=beats, stroke=TRUE, fillOpacity = 0.5, smoothFactor=0.5,
              color="black") %>%
  # addPolygons(data=beats, stroke=FALSE, fillOpacity = 0.5, smoothFactor=0.5,
              # fillColor=heatCols) %>% 
  addPolygons(data=beats, stroke=FALSE, fillOpacity = 0.5, smoothFactor = 0.5,
              color = ~pal(rapeByBeat$freq)) %>% 
  addLegend("bottomright", pal = pal, values = rapeByBeat$freq,
            title = "Legend",
            opacity = 1)

Session Info

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] here_1.0.1        foreign_0.8-80    maptools_1.0-2    sp_1.4-5         
##  [5] lubridate_1.7.9.2 Hmisc_4.4-2       ggplot2_3.3.5     Formula_1.2-4    
##  [9] survival_3.2-7    lattice_0.20-41   leaflet_2.0.4.1   dplyr_1.0.3      
## [13] tidyr_1.1.2       readxl_1.3.1     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.5          png_0.1-7           rprojroot_2.0.2    
##  [4] assertthat_0.2.1    digest_0.6.27       utf8_1.2.2         
##  [7] R6_2.5.1            cellranger_1.1.0    backports_1.2.1    
## [10] evaluate_0.14       pillar_1.6.2        rlang_0.4.11       
## [13] rstudioapi_0.13     data.table_1.13.6   rpart_4.1-15       
## [16] Matrix_1.2-18       checkmate_2.0.0     rmarkdown_2.6      
## [19] splines_4.0.3       stringr_1.4.0       htmlwidgets_1.5.3  
## [22] munsell_0.5.0       compiler_4.0.3      xfun_0.20          
## [25] pkgconfig_2.0.3     base64enc_0.1-3     rgeos_0.5-5        
## [28] htmltools_0.5.2     nnet_7.3-14         tidyselect_1.1.0   
## [31] tibble_3.1.4        gridExtra_2.3       htmlTable_2.1.0    
## [34] fansi_0.5.0         crayon_1.4.1        withr_2.4.2        
## [37] grid_4.0.3          jsonlite_1.7.2      gtable_0.3.0       
## [40] lifecycle_1.0.0     DBI_1.1.1           magrittr_2.0.1     
## [43] scales_1.1.1        cli_3.0.1           stringi_1.5.3      
## [46] farver_2.1.0        latticeExtra_0.6-29 ellipsis_0.3.2     
## [49] generics_0.1.0      vctrs_0.3.8         RColorBrewer_1.1-2 
## [52] tools_4.0.3         glue_1.4.2          purrr_0.3.4        
## [55] crosstalk_1.1.0.1   jpeg_0.1-8.1        fastmap_1.1.0      
## [58] yaml_2.2.1          colorspace_2.0-2    cluster_2.1.0      
## [61] knitr_1.30